"""
Phase 3: Portfolio Allocation
==============================
Tests whether aging countries tilt portfolios toward safe assets.
Z → debt_share (debt/gross assets), Z → safe_share (bilateral to safe issuers).
KAOPEN interactions, age decomposition, income terciles.

Output: table3_portfolio_allocation.md
"""

import sys
from pathlib import Path

import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

PROJECT_DIR = Path("/mnt/c/demographics_capital_flows/safe_assets")
MULTILATERAL_DIR = PROJECT_DIR.parent / "multilateral"
GRAVITY_DIR = PROJECT_DIR.parent / "gravity_bilateral"
sys.path.insert(0, str(MULTILATERAL_DIR / "src"))
from model import PanelGLS

PROCESSED_DIR = PROJECT_DIR / "data" / "processed"
TABLES_DIR = PROJECT_DIR / "output" / "tables"

OECD_38 = [
    "AUS", "AUT", "BEL", "CAN", "CHL", "COL", "CRI", "CZE", "DNK", "EST",
    "FIN", "FRA", "DEU", "GRC", "HUN", "ISL", "IRL", "ISR", "ITA", "JPN",
    "KOR", "LVA", "LTU", "LUX", "MEX", "NLD", "NZL", "NOR", "POL", "PRT",
    "SVK", "SVN", "ESP", "SWE", "CHE", "TUR", "GBR", "USA",
]

CONTROLS = ['rgdp_growth', 'inflation', 'fiscal_bal_gdp', 'kaopen', 'nfa_gdp_lag']


def stars(p):
    if p < 0.01: return '***'
    if p < 0.05: return '**'
    if p < 0.10: return '*'
    return ''


def run_model(df, dep_var, regressors, label, feature_names=None):
    """Run PanelGLS and return results dict."""
    cols = [dep_var] + regressors
    available = [c for c in cols if c in df.columns]
    missing = set(cols) - set(available)
    if missing:
        print(f"  [{label}] Missing columns: {missing}")
        regressors = [r for r in regressors if r in df.columns]
        if dep_var not in df.columns:
            print(f"  [{label}] Dep var {dep_var} missing — skipping")
            return None

    sub = df.dropna(subset=[dep_var] + regressors).copy()
    if len(sub) < 50:
        print(f"  [{label}] Insufficient obs ({len(sub)}) — skipping")
        return None

    names = feature_names or regressors
    gls = PanelGLS()
    gls.fit(sub[dep_var].values, sub[regressors].values,
            sub['iso3'].values, sub['year'].values)

    print(f"\n  [{label}]  N={gls.n_obs}, countries={gls.n_countries}, "
          f"R²={gls.r_squared:.4f}, rho={gls.rho:.3f}")

    results = {
        'label': label,
        'dep_var': dep_var,
        'n_obs': gls.n_obs,
        'n_countries': gls.n_countries,
        'r_squared': gls.r_squared,
        'rho': gls.rho,
    }
    for i, name in enumerate(names):
        results[f'coef_{name}'] = gls.beta[i]
        results[f'se_{name}'] = gls.se[i]
        results[f'p_{name}'] = gls.pvalues[i]
        sig = stars(gls.pvalues[i])
        print(f"    {name:<25} {gls.beta[i]:>8.4f} ({gls.se[i]:.4f}) {sig}")

    return results


def construct_safe_share(df):
    """
    Construct safe_share: fraction of a country's debt assets allocated to
    safe-rated destinations. Uses bilateral CPIS data.
    """
    bilateral_path = GRAVITY_DIR / "data" / "processed" / "bilateral_panel.csv"
    if not bilateral_path.exists():
        print("  bilateral_panel.csv not found — skipping safe_share construction")
        return df

    print("  Loading bilateral panel for safe_share ...")
    bp = pd.read_csv(bilateral_path, usecols=['reporter', 'partner', 'year',
                                               'portfolio_debt', 'iso_o', 'iso_d'])

    # Get safe issuer status for destinations
    ratings = df[['iso3', 'year', 'safe_issuer']].drop_duplicates()
    bp = bp.merge(ratings.rename(columns={'iso3': 'iso_d', 'safe_issuer': 'dest_safe'}),
                  on=['iso_d', 'year'], how='left')
    bp['dest_safe'] = bp['dest_safe'].fillna(0)

    # Aggregate by origin-year: total debt to safe vs total debt
    bp_valid = bp[bp['portfolio_debt'].notna() & (bp['portfolio_debt'] > 0)].copy()

    origin_year = bp_valid.groupby(['iso_o', 'year']).agg(
        total_debt=('portfolio_debt', 'sum'),
        safe_debt=('portfolio_debt', lambda x: x[bp_valid.loc[x.index, 'dest_safe'] == 1].sum())
    ).reset_index()

    # Safe share
    origin_year['safe_share'] = origin_year['safe_debt'] / origin_year['total_debt'].replace(0, np.nan)

    # Merge back
    df = df.merge(origin_year[['iso_o', 'year', 'safe_share']].rename(
        columns={'iso_o': 'iso3'}), on=['iso3', 'year'], how='left')

    n = df['safe_share'].notna().sum()
    print(f"  safe_share constructed: {n:,} non-null, mean={df['safe_share'].mean():.3f}")
    return df


def main():
    print("=" * 70)
    print("PHASE 3: Portfolio Allocation")
    print("=" * 70)

    df = pd.read_csv(PROCESSED_DIR / "safe_asset_panel.csv")
    print(f"Panel: {df['iso3'].nunique()} countries, {len(df):,} obs")

    # Construct safe_share from bilateral data
    df = construct_safe_share(df)

    # KAOPEN interaction terms for portfolio variables
    for z in ['Z_1', 'Z_2', 'Z_3']:
        if z in df.columns and 'kaopen' in df.columns:
            df[f'{z}_x_kaopen'] = df[z] * df['kaopen']

    all_results = []
    demo_vars = ['Z_1', 'Z_2', 'Z_3']
    controls = [c for c in CONTROLS if c in df.columns]

    # ================================================================
    # SECTION A: Debt Share (portfolio tilt toward bonds)
    # ================================================================
    print("\n" + "─" * 60)
    print("A. Debt Share of Gross Assets")
    print("─" * 60)

    # M1a: Z → debt_share (full sample)
    r = run_model(df, 'debt_share', demo_vars + controls,
                  "M1a: Z → debt_share", demo_vars + controls)
    if r: all_results.append(r)

    # M1b: OECD
    oecd = df[df['iso3'].isin(OECD_38)].copy()
    r = run_model(oecd, 'debt_share', demo_vars + controls,
                  "M1b: OECD Z → debt_share", demo_vars + controls)
    if r: all_results.append(r)

    # M1c: Non-OECD
    non_oecd = df[~df['iso3'].isin(OECD_38)].copy()
    r = run_model(non_oecd, 'debt_share', demo_vars + controls,
                  "M1c: Non-OECD Z → debt_share", demo_vars + controls)
    if r: all_results.append(r)

    # M1d: Z × KAOPEN on debt_share
    int_vars = ['Z_1_x_kaopen', 'Z_2_x_kaopen', 'Z_3_x_kaopen']
    int_avail = [v for v in int_vars if v in df.columns]
    if int_avail:
        r = run_model(df, 'debt_share', demo_vars + controls + int_avail,
                      "M1d: Z×KAOPEN → debt_share", demo_vars + controls + int_avail)
        if r: all_results.append(r)

    # ================================================================
    # SECTION B: Safe Share (bilateral allocation to safe issuers)
    # ================================================================
    print("\n" + "─" * 60)
    print("B. Safe Share (Debt to Safe Issuers / Total Debt)")
    print("─" * 60)

    if 'safe_share' in df.columns:
        # M2a: Z → safe_share (full)
        r = run_model(df, 'safe_share', demo_vars + controls,
                      "M2a: Z → safe_share", demo_vars + controls)
        if r: all_results.append(r)

        # M2b: OECD
        r = run_model(oecd, 'safe_share', demo_vars + controls,
                      "M2b: OECD Z → safe_share", demo_vars + controls)
        if r: all_results.append(r)

        # M2c: Z × KAOPEN
        if int_avail:
            r = run_model(df, 'safe_share', demo_vars + controls + int_avail,
                          "M2c: Z×KAOPEN → safe_share", demo_vars + controls + int_avail)
            if r: all_results.append(r)

    # ================================================================
    # SECTION C: Equity Share (complement — should go opposite)
    # ================================================================
    print("\n" + "─" * 60)
    print("C. Equity Share of Gross Assets")
    print("─" * 60)

    if 'equity_share' in df.columns:
        r = run_model(df, 'equity_share', demo_vars + controls,
                      "M3a: Z → equity_share", demo_vars + controls)
        if r: all_results.append(r)

        r = run_model(oecd, 'equity_share', demo_vars + controls,
                      "M3b: OECD Z → equity_share", demo_vars + controls)
        if r: all_results.append(r)

    # ================================================================
    # SECTION D: Age Decomposition
    # ================================================================
    print("\n" + "─" * 60)
    print("D. Age Decomposition (old_dep vs youth_dep)")
    print("─" * 60)

    age_vars = ['old_dep', 'youth_dep']

    for dep_var in ['debt_share', 'safe_share', 'equity_share']:
        if dep_var in df.columns:
            r = run_model(df, dep_var, age_vars + controls,
                          f"M4: age → {dep_var}", age_vars + controls)
            if r: all_results.append(r)

    # ================================================================
    # SECTION E: Debt-Equity Ratio
    # ================================================================
    print("\n" + "─" * 60)
    print("E. Debt-Equity Ratio (Portfolio Tilt Intensity)")
    print("─" * 60)

    if 'debt_equity_ratio' in df.columns:
        r = run_model(df, 'debt_equity_ratio', demo_vars + controls,
                      "M5a: Z → debt/equity ratio", demo_vars + controls)
        if r: all_results.append(r)

        r = run_model(oecd, 'debt_equity_ratio', demo_vars + controls,
                      "M5b: OECD Z → debt/equity", demo_vars + controls)
        if r: all_results.append(r)

    # ================================================================
    # SECTION F: Income Terciles
    # ================================================================
    print("\n" + "─" * 60)
    print("F. Income Tercile Heterogeneity")
    print("─" * 60)

    if 'gdp_pc_ppp' in df.columns and 'debt_share' in df.columns:
        # Compute terciles based on country median GDP/capita
        country_income = df.groupby('iso3')['gdp_pc_ppp'].median()
        tercile_cuts = country_income.quantile([1/3, 2/3])
        low = country_income[country_income <= tercile_cuts.iloc[0]].index
        mid = country_income[(country_income > tercile_cuts.iloc[0]) &
                             (country_income <= tercile_cuts.iloc[1])].index
        high = country_income[country_income > tercile_cuts.iloc[1]].index

        for tercile_name, countries in [('Low', low), ('Mid', mid), ('High', high)]:
            sub = df[df['iso3'].isin(countries)].copy()
            r = run_model(sub, 'debt_share', demo_vars + controls,
                          f"M6_{tercile_name}: Z → debt_share",
                          demo_vars + controls)
            if r: all_results.append(r)

    # ── Build results table ──
    print("\n\nBuilding results table ...")
    build_table(all_results)

    print("\n" + "=" * 70)
    print("Phase 3 complete.")
    print("=" * 70)


def build_table(all_results):
    """Save markdown results table."""
    if not all_results:
        print("  No results to tabulate.")
        return

    key_vars = ['Z_1', 'Z_2', 'Z_3', 'old_dep', 'youth_dep',
                'Z_1_x_kaopen', 'Z_2_x_kaopen', 'Z_3_x_kaopen']

    md = ["# Table 3: Portfolio Allocation Results\n"]

    # Summary
    md.append("## Model Summary\n")
    md.append("| Model | Dep Var | N | Countries | R² | ρ |")
    md.append("|---|---|---|---|---|---|")
    for r in all_results:
        md.append(f"| {r['label']} | {r['dep_var']} | {r['n_obs']} "
                  f"| {r['n_countries']} | {r['r_squared']:.3f} | {r['rho']:.3f} |")

    # Key coefficients
    md.append("\n## Key Coefficients\n")
    md.append("| Model | Variable | Coef | SE | p-value | Sig |")
    md.append("|---|---|---|---|---|---|")
    for r in all_results:
        for var in key_vars:
            ckey = f'coef_{var}'
            if ckey in r:
                p = r[f'p_{var}']
                md.append(f"| {r['label']} | {var} | {r[ckey]:.4f} "
                          f"| {r[f'se_{var}']:.4f} | {p:.4f} | {stars(p)} |")

    md.append(f"\n*Controls: {', '.join(CONTROLS)}*")
    md.append("*PanelGLS with AR(1) correction, no fixed effects.*")
    md.append("*debt_share = debt_assets_gdp / gross_assets_gdp.*")
    md.append("*safe_share = bilateral debt to AA-or-above destinations / total bilateral debt (CPIS).*")

    out_path = TABLES_DIR / "table3_portfolio_allocation.md"
    out_path.write_text('\n'.join(md))
    print(f"  Saved: {out_path}")


if __name__ == "__main__":
    main()
